/** * Copyright (C) 2013 HalZhang */ package com.halzhang.android.startupnews.data.parser; import android.text.TextUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.TextNode; import org.w3c.dom.Node; import java.net.URI; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; /** * StartupNews * <p> * html解析 * </p> * * @author <a href="http://weibo.com/halzhang">Hal</a> * @version Mar 18, 2013 */ public abstract class BaseHTMLParser<T> { public static final Pattern CREATEAT_PATTERN = Pattern.compile("\\d{1,2}\\s\\w+\\sago"); public static final int UNDEFINED = -1; public T parse(String input) throws Exception { return parseDocument(Jsoup.parse(input)); } public abstract T parseDocument(Document doc) throws Exception; public static String getDomainName(String url) { URI uri; try { uri = new URI(url); String domain = uri.getHost(); return domain.startsWith("www.") ? domain.substring(4) : domain; } catch (Exception e) { return url; } } public static <T extends Object> T getSafe(List<T> list, int index) { if (list.size() - 1 >= index) { return list.get(index); } else { return null; } } public static String getFirstTextValueInElementChildren(Element element) { if (element == null) { return ""; } for (org.jsoup.nodes.Node node : element.childNodes()) { if (node instanceof TextNode) { return ((TextNode) node).text(); } } return ""; } public static String getStringValue(String query, Node source, XPath xpath) { try { return ((Node) xpath.evaluate(query, source, XPathConstants.NODE)).getNodeValue(); } catch (Exception e) { // TODO insert Google Analytics tracking here? } return ""; } public static Integer getIntValueFollowedBySuffix(String value, String suffix) { if (value == null || suffix == null) return 0; int suffixWordIdx = value.indexOf(suffix); if (suffixWordIdx >= 0) { String extractedValue = value.substring(0, suffixWordIdx); try { return Integer.parseInt(extractedValue); } catch (NumberFormatException e) { return UNDEFINED; } } return UNDEFINED; } public static String getStringValuePrefixedByPrefix(String value, String prefix) { int prefixWordIdx = value.indexOf(prefix); if (prefixWordIdx >= 0) { return value.substring(prefixWordIdx + prefix.length()); } return null; } public static String resolveRelativeSNURL(String url) { if (TextUtils.isEmpty(url)) { return null; } String snurl = "http://news.dbanotes.net/"; if (url.startsWith("http") || url.startsWith("ftp")) { return url; } else if (url.startsWith("/")) { return snurl + url.substring(1); } else { return snurl + url; } } public String getCreateAt(String text) { if (TextUtils.isEmpty(text)) { return null; } Matcher matcher = CREATEAT_PATTERN.matcher(text); if (matcher.find()) { return matcher.group(); } return null; } }